library(dplyr)
library(tidyverse)
library(ggplot2)
library(ggpubr)
library(ggthemes)
library(stats)
library(moments)
library(grid)
library(formattable)
library(gridExtra)
library(ggsignif)
library(hrbrthemes)
library(plotrix)
library(rstatix)
library(car)
library(plotly)
data <- read.csv("/Users/nuriteliash/Documents/GitHub/Sofia-varroa-project/EAG_data_all.csv")
the tested effect (x) is the Essential oil “EO” dissolved in acetone, the “response” (y) is the electrophysiological response of the leg measured in mV. Each leg was stimulated by different stimuli in the following order:Air > Air > Air > Acetone (solvent) > 0.1 > 0.25 > 0.5 > 1 > 2.5 > 5 > Acetone (solvent) > Air
the response was normalized…. [please complete in here:)]
to test the difference in response amplitude to the different stimuli, we will use One way ANOVA. then a post-hoc Tukey test, to see which of the stimuli are significantly differnet. we followed this tutorial for ANOVA in R
# our data contains:
str(data)
## 'data.frame': 664 obs. of 4 variables:
## $ leg : Factor w/ 83 levels "VF1","VF10","VF11",..: 1 1 1 1 1 1 1 1 12 12 ...
## $ stimuli : Factor w/ 8 levels "0.1","0.25","0.5",..: 8 1 2 3 4 5 6 7 8 1 ...
## $ EO : Factor w/ 9 levels "EO4309","EO4444",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ response: num 109.5 118.5 86.6 131.9 108.5 ...
table(data$stimuli, data$EO)
##
## EO4309 EO4444 EO4891 EO4899 EO5068 EO5663 EO5714 EO5749 EO5763
## 0.1 8 10 10 10 11 9 9 9 7
## 0.25 8 10 10 10 11 9 9 9 7
## 0.5 8 10 10 10 11 9 9 9 7
## 1 8 10 10 10 11 9 9 9 7
## 2.5 8 10 10 10 11 9 9 9 7
## 5 8 10 10 10 11 9 9 9 7
## acet_after 8 10 10 10 11 9 9 9 7
## acet_before 8 10 10 10 11 9 9 9 7
We have tested a total of 9 essential oils, using 83 legs. Each essential oil was tested on at least 7 different legs.
Outliers <- data %>%
group_by(stimuli,EO) %>%
identify_outliers(response)
Outliers
## # A tibble: 50 x 6
## stimuli EO leg response is.outlier is.extreme
## <fct> <fct> <fct> <dbl> <lgl> <lgl>
## 1 0.1 EO4444 VF2 207. TRUE FALSE
## 2 0.1 EO4891 VF78 44.2 TRUE FALSE
## 3 0.1 EO4891 VF79 182. TRUE FALSE
## 4 0.1 EO4891 VF82 171. TRUE FALSE
## 5 0.1 EO5068 VF33 139. TRUE FALSE
## 6 0.1 EO5068 VF34 180. TRUE TRUE
## 7 0.1 EO5749 VF55 161. TRUE FALSE
## 8 0.25 EO4444 VF2 183. TRUE FALSE
## 9 0.25 EO4899 VF21 236. TRUE TRUE
## 10 0.25 EO4899 VF29 49.4 TRUE FALSE
## # … with 40 more rows
# plot it to detect outliers by specific leg
#data %>%
#ggboxplot(x = "stimuli", y = "response", color = "leg")
#the dependent variable should be approximately normally distributed in each cell of the design. This can be checked using the Shapiro-Wilk normality test (shapiro_test()
normality <- data %>%
group_by(stimuli,EO) %>%
shapiro_test(response)
normality
## # A tibble: 72 x 5
## stimuli EO variable statistic p
## <fct> <fct> <chr> <dbl> <dbl>
## 1 0.1 EO4309 response 0.883 0.200
## 2 0.1 EO4444 response 0.852 0.0622
## 3 0.1 EO4891 response 0.937 0.518
## 4 0.1 EO4899 response 0.942 0.581
## 5 0.1 EO5068 response 0.818 0.0165
## 6 0.1 EO5663 response 0.775 0.0106
## 7 0.1 EO5714 response 0.989 0.994
## 8 0.1 EO5749 response 0.968 0.880
## 9 0.1 EO5763 response 0.889 0.270
## 10 0.25 EO4309 response 0.940 0.606
## # … with 62 more rows
# looks like that for a few EO x stimuli combinations the response doesnt distribute normally (p<0.05)
# however, as we have enough replicates (n=10), it is ok to contniue with ANOVA
# Build the linear model
model <- lapply(split(data, data$EO), function(i){
lm(response ~ stimuli, data = i)
})
# now you can Create a QQ plot of residuals of each EO, for example, essential oil EO4309:
ggqqplot(residuals(model$EO4309))
plot(model$EO4309, 1)
in my opinion, if you detect an outlier, you should take out the entire leg, and then check again the normality
as we wish to know the difference in response to stimuli, in each EO, we will use one way Anova, after spliting the data per EO:
anova_perEO <- lapply(split(data, data$EO), function(i){
anova(lm(response ~ stimuli, data = i))
})
anova_perEO
## $EO4309
## Analysis of Variance Table
##
## Response: response
## Df Sum Sq Mean Sq F value Pr(>F)
## stimuli 7 1482 211.75 0.3499 0.9269
## Residuals 56 33894 605.24
##
## $EO4444
## Analysis of Variance Table
##
## Response: response
## Df Sum Sq Mean Sq F value Pr(>F)
## stimuli 7 5452 778.86 0.7103 0.6634
## Residuals 72 78951 1096.54
##
## $EO4891
## Analysis of Variance Table
##
## Response: response
## Df Sum Sq Mean Sq F value Pr(>F)
## stimuli 7 10606 1515.2 0.7921 0.5963
## Residuals 72 137723 1912.8
##
## $EO4899
## Analysis of Variance Table
##
## Response: response
## Df Sum Sq Mean Sq F value Pr(>F)
## stimuli 7 6311 901.6 0.5624 0.7839
## Residuals 72 115422 1603.1
##
## $EO5068
## Analysis of Variance Table
##
## Response: response
## Df Sum Sq Mean Sq F value Pr(>F)
## stimuli 7 16440 2348.6 1.7911 0.1004
## Residuals 80 104897 1311.2
##
## $EO5663
## Analysis of Variance Table
##
## Response: response
## Df Sum Sq Mean Sq F value Pr(>F)
## stimuli 7 17143 2449.0 1.4552 0.1994
## Residuals 64 107705 1682.9
##
## $EO5714
## Analysis of Variance Table
##
## Response: response
## Df Sum Sq Mean Sq F value Pr(>F)
## stimuli 7 17420 2488.6 0.6658 0.7
## Residuals 64 239210 3737.7
##
## $EO5749
## Analysis of Variance Table
##
## Response: response
## Df Sum Sq Mean Sq F value Pr(>F)
## stimuli 7 4250 607.08 0.4378 0.8748
## Residuals 64 88752 1386.75
##
## $EO5763
## Analysis of Variance Table
##
## Response: response
## Df Sum Sq Mean Sq F value Pr(>F)
## stimuli 7 249.4 35.63 0.0885 0.9987
## Residuals 48 19324.4 402.59
to test the significant difference of each stimuli concentration from the solvent
tukey <- data %>%
group_by(EO) %>%
tukey_hsd(response ~ stimuli)
tukey
## # A tibble: 252 x 10
## EO term group1 group2 null.value estimate conf.low conf.high p.adj
## * <fct> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 EO4309 stimuli 0.1 0.25 0 8.73 -30.0 47.5 0.996
## 2 EO4309 stimuli 0.1 0.5 0 13.1 -25.6 51.9 0.961
## 3 EO4309 stimuli 0.1 1 0 14.8 -23.9 53.5 0.928
## 4 EO4309 stimuli 0.1 2.5 0 11.3 -27.4 50.1 0.983
## 5 EO4309 stimuli 0.1 5 0 11.1 -27.6 49.8 0.985
## 6 EO4309 stimuli 0.1 acet_after 0 3.39 -35.3 42.1 1
## 7 EO4309 stimuli 0.1 acet_befo… 0 5.22 -33.5 43.9 1
## 8 EO4309 stimuli 0.25 0.5 0 4.41 -34.3 43.1 1
## 9 EO4309 stimuli 0.25 1 0 6.06 -32.7 44.8 1
## 10 EO4309 stimuli 0.25 2.5 0 2.62 -36.1 41.3 1
## # … with 242 more rows, and 1 more variable: p.adj.signif <chr>
for some reason, i cannot add the leg ID to the hovering text in the box plot, but i can do it in the dot-plot. so in order to detect outliers, we can detect them in the first plot (the box plot), then identify their ID leg in the dot plot:)
# first sort the order of stimuli
data$stimuli <- factor(data$stimuli,levels = c("acet_before", "0.1", "0.25", "0.5", "1", "2.5", "5","acet_after"))
box <- ggplot(data, aes(x = stimuli, y = response)) +
geom_boxplot(aes(colour = EO)) +
facet_wrap( ~ EO) +
theme_linedraw() +
ggtitle("Varroa foreleg response to different essential oils") +
xlab("Stimuli amount (microgram)") +
ylab("Normalized response (%)") +
theme(axis.text.x=element_text(angle=45, hjust=1))
ggplotly(box, tooltip = "leg")
dot <- ggplot(data, aes(x = stimuli, y = response, colour = EO, leg=leg)) +
geom_point() +
facet_wrap( ~ EO) +
theme_linedraw() +
ggtitle("Varroa foreleg response to different essential oils") +
xlab("Stimuli amount (microgram)") +
ylab("Normalized response (%)") +
theme(axis.text.x=element_text(angle=45, hjust=1))
ggplotly(dot, tooltip = c("leg","response"))